***************************************************************
* Title: rwanda_student_baseline_clean_jde.do
* Author: Todd Pugatch
* Last update: June 10 2024
* Description: data cleaning for Blimpo and Pugatch, "Entrepreneurship Education
*	and Teacher Training in Rwanda," Stage 2 Registered report, Journal of 
*	Development Economics
*
* Inputs: 	Student_Survey_cleaned_NOPII.dta
*			teacher_baseline_clean_jde.dta
*			Student_Survey_cleaned_linked_NOPIIs.dta
*			Section14_Q1400-1401_Cleaned_V12.dta
*			rwanda_student_baseline_modify1_aux1.do
*
* Outputs: student_baseline_clean_jde.dta
* Notes: cleans student baseline survey
****************************************************************

* Set environment 
local start=`"$S_TIME"'
clear
clear matrix
clear mata
graph drop _all
program drop _all
cap log close
set more off

* Set directories 
*global main "[SET MAIN DIRECTORY HERE]"
	global rawdata "$main/01_data/01_raw"
	global cleandata "$main/01_data/02_clean"
	global dofiles "$main/02_dofiles"
	global dataprep "$main/02_dofiles/00_dataprep"
	global results "$main/03_results"
	global temp "$main/04_output"

	
* define exchange rate to convert all variables in FRW into USD
local xrate=763.5759 /*exchange rate on 1 Mar 2016: http://www.exchangerates.org.uk/USD-RWF-exchange-rate-history-full.html*/

/******************************************************************
  					LOAD AND PREPARE DATA
variable naming and label conventions: 
	--names:	use [bl/el] suffix for baseline/endline
				omit if variable is an identifier that could be used to match
					across datasets, like treatment status or school code
	--labels: 	use B/E for baseline/endline 
				use [H/T/S]Q for head teacher/teacher/student questionnaire
	--example: enrollment, as reported by head teacher at baseline in item #123
		name: enroll_bl
		label: "BHQ123: enrollment"
*******************************************************************/

qui use "$rawdata/student/Student_Survey_cleaned_NOPII.dta", clear
******************************************************
*													 *
* 		SECTION 1: identifying information			 *
*													 *
******************************************************
/*correct coding errors */
qui replace district=9 if school_code_106==301135
qui replace sector=111 if school_code_106==301135
qui replace cell=164 if school_code_106==301135

/*update code for xxx (incorrectly labeled 605118)*/
replace school_code_106=405102 if school==95
replace school_code_106=506118 if school==80

/*update discrepancies between treatment status between sampling spreadsheet and dataset*/
qui replace group_107=2 if school_code_106==301112 	
qui replace group_107=1 if school_code_106==403110 	
qui replace group_107=1 if school_code_106==403037 	
qui replace group_107=1 if school_code_106==403050 	
qui replace group_107=1 if school_code_106==305012		
qui replace group_107=1 if school_code_106==504114		

/*merge with school-level data from teacher survey (e.g., public & strata indicators)*/
local teachervars "public strata"

qui gen long school_code=school_code_106  /*long format necessary because xxxx has school code longer than 7 digits*/
qui save "$temp/studenttemp.dta", replace


use "$cleandata/teacher_baseline_clean_jde.dta", clear

keep school_code `teachervars'
qui save "$temp/teachertemp.dta", replace
qui use "$temp/studenttemp.dta", clear
qui merge m:1 school_code using "$temp/teachertemp.dta", keepusing(`teachervars') force
list school if _merge!=3
drop if school_code_106==.
drop _merge

qui gen schoolname_bl=school
lab var schoolname_bl "school name, as reported in baseline (school)"
lab val schoolname_bl schoollab

/*create numeric student ID variable:
	Variable "uniqueid" is string variable of form: 0[school_code]st_[studentnumber]. Convert to numeric, in hopes of 
	matching with student id variable in endline.*/
qui gen uniqueid2=subinstr(uniqueid,"st_","",.)
qui destring uniqueid2, gen(studentid)	
format studentid %12.0g
drop uniqueid2

* check for duplicates of studentid
/*DECISION: assume that students with schoolid==504102 for xxx but whose unique id begins with 
	405102 (xxx) are actually enrolled in xxx. This is because there are 15 duplicate studentid's
	beginning with 405102 with a matching schoolid, but no other students with school_code==504102.*/
qui drop if studentid==.|school_code==.
duplicates report studentid
duplicates list studentid
duplicates tag student, gen(dupes)
sort studentid school_code
list school_code schoolname_bl studentid if dupes==1
forval i=1/15 {
	qui replace studentid=504102`i' if studentid==405102`i' & school_code==504102
}
	
/*Merge with baseline teacherid variable. */
qui save "$temp/studenttemp.dta", replace

qui use "$rawdata/student/Student_Survey_cleaned_linked_NOPIIs.dta", clear
qui destring uniqueid, gen(studentid)
format studentid %12.0g

* fix duplicate studentid issue between xxx & xxx as above
forval i=1/15 {
	qui replace studentid=504102`i' if studentid==405102`i' & school_code==504102
}
qui drop if studentid==.|school_code_106==.
foreach x in teacherid same_teacher {
	ren `x' `x'_bl
}
lab var teacherid_bl "teacherid, baseline"
keep studentid teacherid_bl same_teacher_bl

qui save "$temp/linktemp.dta", replace
qui use "$temp/studenttemp.dta", clear

* merge with full baseline student data
qui merge 1:1 studentid using "$temp/linktemp.dta"
qui drop if _merge==2
drop _merge
	
/*create district & province IDs to match Census/map codes*/
qui gen district_id=.
qui replace district_id=57 if district==1 
qui replace district_id=53 if district==2 
qui replace district_id=54 if district==3 
qui replace district_id=56 if district==4 
qui replace district_id=42 if district==5 
qui replace district_id=45 if district==6 
qui replace district_id=43 if district==7 
qui replace district_id=41 if district==8 
qui replace district_id=31 if district==9 
qui replace district_id=35 if district==10 
qui replace district_id=34 if district==11 
lab var district_id "District ID, Census code"

qui gen province_id=.
qui replace province_id=5 if province==1 /*Eastern*/
qui replace province_id=4 if province==2 /*Northern*/
qui replace province_id=3 if province==3 /*Western*/
lab var province_id "Province ID, Census code"

/*treatment status*/
* check that all students within a school assigned to same treatment status
bysort school_code_106: egen x=mode(group_107)
list school school_code_106 group_107 if group_107!=x
drop x

* CHECK IMBALANCE OF TREATMENT/CONTROL
qui gen treatment=(group_107==1)
qui replace treatment=. if group_107==.
qui egen tagged=tag(school_code_106)
tab group_107, mi
tab group_107 if tagged==1, mi
lab def treatment 0 "control" 1 "treatment"
lab val treatment treatment 
drop tagged

/*alternate versions of treatment (see "Manual Checking Report.docx" for more details)*/
* alternate version 1: code unassigned replacement schools as missing
qui gen treatment_unassgn=treatment
qui replace treatment_unassgn=. if school_code_106==305112	
qui replace treatment_unassgn=. if school_code_106==304102	
qui replace treatment_unassgn=. if school_code_106==403121	
qui replace treatment_unassgn=. if school_code_106==503118		
lab var treatment_unassgn "treatment status (unassigned replacement schools as missing)"

* alternative version 2: code schools with mismatched status between E! and initial assignment
qui gen treatment_educateassgnt=treatment
qui replace treatment_educateassgnt=1 if school_code_106==301112 	
qui replace treatment_educateassgnt=0 if school_code_106==403110 	
qui replace treatment_educateassgnt=0 if school_code_106==403037 	

/*additional mismatches*/
qui replace treatment_educateassgnt=1 if school_code_106==503014 	
qui replace treatment_educateassgnt=0 if school_code_106==503104 	
lab var treatment_educateassgnt "treatment status (based on Educate! records, not initial assignment)"
foreach x in unassgn educateassgnt {
	lab val treatment_`x' treatment
}


************************************************************************
*											   						   *
* 	       SECTION 2: Student home life			   					   *
* 											   						   *
************************************************************************
/*not coded:	specific home location (q201)
				*transportation to school (q204) */
				
qui gen rural_bl=(loc_201==1)
qui gen homeowner_bl=(house_201==2)
qui gen boarding_bl=(board_day_202==1)
qui gen home_far_bl=((dist_km_203>=10 & dist_km_203!=.)|(dist_min_203>=60 & dist_min_203!=.))
lab var rural_bl "BSQ201: student's home in rural area" 
lab var homeowner_bl "BSQ201: student's family owns their home"
lab var boarding_bl "BSQ202: boarding student"
lab var home_far_bl "BSQ203: home at least 10km or 1hr away"

* SECTION 3: Student SES
/*age, gender, parents*/
qui gen age_bl=age_300
qui replace age_bl=. if age_300==-99
qui gen female=(gender_301==2)
qui gen bothparents_bl=(parents_302==3)
qui gen motheronly_bl=(parents_302==2)
qui gen fatheronly_bl=(parents_302==1)
qui gen orphan_bl=(parents_302==4)
lab var age_bl "BSQ300: age (-99 recoded to missing)"
lab var bothparents_bl "BSQ302: has both parents"
lab var motheronly_bl "BSQ302: has mother only"
lab var fatheronly_bl "BSQ302: has father only"
lab var orphan_bl "BSQ302: has neither biological parent"

/*household assets*/
qui gen dirtfloor_bl=(floor_mat_303==1)
qui gen roof_iron_bl=(roof_mat_304==3)
qui gen roof_cement_bl=(roof_mat_304==5)
qui gen water_piped_bl=(water_305==1)
qui gen water_unprotected_bl=(water_305==6|water_305==7)
local assets "assets_306_Radio assets_306_Television assets_306_Telephone assets_306_Refrigerator assets_306_Bicycle assets_306_Motorcycle assets_306_PrivateCar"
qui egen assets_pct_bl=rowmean(`assets')
qui pca `assets'
qui predict assets_pc1_bl
qui gen cook_wood_bl=(cook_307==1)
qui gen light_electric_bl=(light_308==1|light_308==2)
lab var dirtfloor_bl "BSQ303: floor of home made of earth/mud"
lab var roof_iron_bl "BSQ304: roof of home made of iron"
lab var roof_cement_bl "BSQ304: roof of home made of cement/concrete/clay/tiles"
lab var water_piped_bl "BSQ305: piped water in household"
lab var water_unprotected_bl "BSQ305: unprotected water source in household"
lab var assets_pct_bl "BSQ306: proportion of HH assets owned, of 7"
lab var assets_pc1_bl "BSQ306: asset index (1st principal component)"
lab var cook_wood_bl "BSQ307: household cooking source is wood"
lab var light_electric_bl "BSQ308: household light source is electricity or generator"

/*parental & sibling occupations and education*/
foreach x in fath moth {
	qui gen `x'_ag_bl=(occup_`x'er_309==1)
	qui gen `x'_business_bl=(occup_`x'er_309==2)
	qui gen `x'_pro_bl=(occup_`x'er_309==6)
}
qui gen moth_work_bl=(occup_mother_309>=1 & occup_mother_309<=6 & occup_mother_309!=.)
qui gen parent_business_bl=(fath_business_bl==1|moth_business_bl==1)
qui gen parent_pro_bl=(fath_pro_bl==1|moth_pro_bl==1)
foreach x in fath moth {
	qui gen `x'_primary_bl=(educ_`x'_310==1)
	qui gen `x'_secondary_bl=(educ_`x'_310==2)
	qui gen `x'_tertiary_bl=(educ_`x'_310==3|educ_`x'_310==4)
	qui gen `x'_primary_ormore_bl=(`x'_primary_bl==1|`x'_secondary_bl==1|`x'_tertiary_bl==1)
	qui gen `x'_secondary_ormore_bl=(`x'_secondary_bl==1|`x'_tertiary_bl==1)
}
qui gen sib_secondary_bl=(educ_sibling_310==2)
qui gen sib_tertiary_bl=(educ_sibling_310==3|educ_sibling_310==4)
qui gen sib_secondary_ormore_bl=(sib_secondary_bl==1|sib_tertiary_bl==1)
foreach x in fath moth {
	lab var `x'_ag_bl "BSQ309: `x'er works in agriculture"
	lab var `x'_business_bl "BSQ309: `x'er works in business"
	lab var `x'_pro_bl "BSQ309: `x'er is professional/salaried"
	foreach s in primary secondary tertiary {
		lab var `x'_`s'_bl "BSQ310: `x'er completed `s'"
	}
	lab var `x'_primary_ormore_bl "BSQ310: `x'er completed primary or more"
	lab var `x'_secondary_ormore_bl "BSQ310: `x'er completed secondary or more"
}
lab var moth_work_bl "BSQ309: mother works"
lab var parent_business_bl "BSQ309: at least one parent in business"
lab var parent_pro_bl "BSQ309: at least one parent is professional/salaried"
lab var sib_secondary_bl "BSQ310: sibling or other in HH completed secondary"
lab var sib_secondary_bl "BSQ310: sibling or other in HH completed tertiary"
lab var sib_secondary_ormore_bl "BSQ310: sibling or other in HH completed secondary or tertiary"

/*SES index: first principal component of household assets, parents' education, 
	and indicator for parents in business or professional occupation.*/
local ses "`assets' moth_primary_bl moth_secondary_bl moth_tertiary_bl fath_primary_bl fath_secondary_bl fath_tertiary_bl parent_business_bl parent_pro_bl"
qui pca `ses'
qui predict ses_pc1_bl	
lab var ses_pc1_bl "BSQ303-310: SES index (1st principal component of assets, parent education, parent occupation)"

/*household businesses*/
qui gen HH_business_bl=(business_311==1)
qui gen HH_business_employs_bl=(bus_info_212bus_details_b__312>1 & bus_info_212bus_details_b__312!=.)
qui replace HH_business_employs_bl=. if business_311!=1
lab var HH_business_bl "BSQ311: household member owns a business"
lab var HH_business_employs_bl "BSQ312: household business has more than 1 paid employee"

******************************************************
* 			SECTION 4: Academic background			 *
******************************************************
qui gen repeat_S4_bl=(repeating_402==2|repeating_402==3)
qui gen reptimes_bl=reptimes_403
qui replace reptimes_bl=. if reptimes_bl==-99
qui gen S3_exam_bl=aggr_404
qui replace S3_exam_bl=. if aggr_404==-99
qui gen S3_exam_miss_bl=(S3_exam_bl==.)
qui gen S3_math_bl=math_405
qui replace S3_math_bl=. if math_405==-99
lab var repeat_S4_bl "BSQ402: repeating S4"
lab var reptimes_bl "BSQ403: number of times repeated a class/level since starting school"
lab var S3_exam_bl "BSQ404: Aggregate score on S3 exam"
lab var S3_exam_miss_bl "BSQ404: missing value for S3 aggregate exam score"
lab var S3_math_bl "BSQ405: Aggregate score on S3 math exam"

**********************************************************************
*																	 *
* 		SECTION 5: Labor market and entrepreneurial experience		 *
*																	 *
**********************************************************************
qui gen earn_money_bl=(earn_500==1)
qui gen personal_business_bl=((source_501==1|source_501==3) & earn_money_bl==1)
qui gen employed_bl=((source_501==2|source_501==3) & earn_money_bl==1)
qui gen earn_last2mths_bl=amt_503 if earn_money_bl==1 & amt_503!=-99
qui replace earn_last2mths_bl=0 if earn_500==0 /*earnings=0 if "No" to earning money question*/
qui gen earn_last2mths_usd_bl=earn_last2mths_bl/`xrate' /*exchange rate on 1 Mar 2016: http://www.exchangerates.org.uk/USD-RWF-exchange-rate-history-full.html*/
lab var earn_money_bl "BSQ500: currently earning money (excluding pocket money)"
lab var personal_business_bl "BSQ501: earns money from personal business"
lab var employed_bl "BSQ501: earns money from employment"
lab var earn_last2mths_bl "BSQ503: earnings from business & employment, last 2 months, FRW"
lab var earn_last2mths_usd_bl "BSQ503: earnings from business & employment, last 2 months, USD"

******************************************************
* 			SECTION 6: Business creation			 *
******************************************************
qui gen ownbusiness_bl=(ownbusns_600==1)
qui gen ownbusiness_ag_bl=((ifyes_ownbusns_600profbus_601==1|ifyes_ownbusns_600profbus_601==1) & ownbusns_600==1)
qui gen ownbusiness_nonag_bl=(ownbusiness_ag_bl==0 & ownbusns_600==1)
qui gen ownbusiness_yr_bl=((ifyes_ownbusns_600months>=12 & ifyes_ownbusns_600months!=.)|  ///
	(ifyes_ownbusns_600days_602>=365 & ifyes_ownbusns_600days_602!=.) & ownbusns_600==1)
qui gen ownbusiness_grpprj_bl=(ifyes_ownbusns_600grpproj_603==1 & ownbusns_600==1)
order ifyes_ownbusns_600with_604_?
qui egen buspartners_family_bl=anymatch(ifyes_ownbusns_600with_604_1-ifyes_ownbusns_600with_604_4), v(1 2 4)
qui egen buspartners_school_bl=anymatch(ifyes_ownbusns_600with_604_1-ifyes_ownbusns_600with_604_4), v(6)
qui egen buspartners_none_bl=anymatch(ifyes_ownbusns_600with_604_1-ifyes_ownbusns_600with_604_4), v(10)
qui gen startcapital_bl=ifyes_ownbusns_600amt_605 if ifyes_ownbusns_600amt_605>=0 & ownbusns_600==1
qui gen startcapital_usd_bl=startcapital_bl/`xrate' /*exchange rate on 1 Mar 2016: http://www.exchangerates.org.uk/USD-RWF-exchange-rate-history-full.html*/
order ifyes_ownbusns_600getmoney_606?
qui egen howgotcapital_family_bl=anymatch(ifyes_ownbusns_600getmoney_6061-ifyes_ownbusns_600getmoney_6063), v(1 2 4)
qui egen howgotcapital_school_bl=anymatch(ifyes_ownbusns_600getmoney_6061-ifyes_ownbusns_600getmoney_6063), v(6)
qui egen howgotcapital_none_bl=anymatch(ifyes_ownbusns_600getmoney_6061-ifyes_ownbusns_600getmoney_6063), v(10)
qui gen business_earn_bl=(ifyes_ownbusns_600earn_607==1 & ownbusns_600==1)
qui gen business_inc_last2mths_bl=ifyes_ownbusns_600amtbus_608 if ifyes_ownbusns_600amtbus_608>=0 & business_earn_bl==1
qui gen business_inc_last2mths_usd_bl=business_inc_last2mths_bl/`xrate' /*exchange rate on 1 Mar 2016: http://www.exchangerates.org.uk/USD-RWF-exchange-rate-history-full.html*/
qui gen business_inc_last2mths_any_bl=(business_inc_last2mths_bl>0 & business_inc_last2mths_bl!=.)
qui gen business_keep_bl=(ifyes_ownbusns_600contbus_609==1 & ownbusns_600==1)


foreach x in ag yr grpprj {
	qui replace ownbusiness_`x'_bl=. if ownbusns_600!=1
}
foreach y in buspartners howgotcapital {
	foreach x in family school {
		qui replace `y'_none_bl=0 if `y'_`x'_bl==1
	}

}

lab var ownbusiness_bl "BSQ600: owns a business back home, started by self"
lab var ownbusiness_ag_bl "BSQ601: own business in agriculture"
lab var ownbusiness_nonag_bl "BSQ601: own non-agricultural business"
lab var ownbusiness_yr_bl "BSQ602: owned business at least 1 year"
lab var ownbusiness_grpprj_bl "BSQ603: started business as group project"
foreach x in family school none {
	lab var buspartners_`x'_bl "BSQ604: business partners: `x'"
	lab var howgotcapital_`x'_bl "BSQ606: how got starting capital: `x'"
}
lab var startcapital_bl "BSQ605: starting capital for business (FRW)"
lab var startcapital_usd_bl "BSQ605: starting capital for business (USD)"
lab var business_earn_bl "BSQ607: owns a business that earns money"
lab var business_inc_last2mths_bl "BSQ608: income from business in last 2 months (FRW)"
lab var business_inc_last2mths_usd_bl "BSQ608: income from business in last 2 months (USD)"
lab var business_keep_bl "BSQ609: plans to continue current business"

******************************************************
* 			SECTION 7: Employment at home			 *
******************************************************
qui gen job_holiday_bl=(paid_700==1)
qui gen jobsrch_holiday_bl=(seekjob_701_1==0)
qui replace jobsrch_holiday_bl=. if job_holiday_bl==1
qui gen dayswork_holiday_bl=dayspaid_702 if job_holiday_bl==1
qui replace dayswork_holiday_bl=60 if dayswork_holiday_bl>60 & dayswork_holiday_bl!=.
qui gen earnday_holiday_bl=earnday_703 if earnday_703>=0 & job_holiday_bl==1
qui gen earnday_holiday_usd_bl=earnday_holiday_bl/`xrate' /*exchange rate on 1 Mar 2016: http://www.exchangerates.org.uk/USD-RWF-exchange-rate-history-full.html*/
qui gen parttime_schl_bl=(ptimework_704_1==1|ptimework_704_1==2)
qui gen parttime_schl_same_bl=(ptimework_704_1==2)
qui gen parttime_schl_diff_bl=(ptimework_704_1==1)
qui gen work_nopay_bl=(withoutpay_705==1|withoutpay_705==2)
qui gen work_nopay_fam_bl=(withoutpay_705==1)
order activ_706_?
qui egen chores_bl=anymatch(activ_706_1-activ_706_3), v(1 2 4 5 6 7 9)
qui gen pocketmoney_bl=(pcktmoney_707==1)
lab var job_holiday_bl "BSQ700: paid to work by someone else last holiday"
lab var jobsrch_holiday_bl "BSQ701: searching for work last holiday (if not employed)"
lab var dayswork_holiday_bl "BSQ702: days worked last holiday (top-coded at 60)"
lab var earnday_holiday_bl "BSQ703: daily earnings last holiday (FRW)"
lab var earnday_holiday_usd_bl "BSQ703: daily earnings last holiday (USD)"
lab var parttime_schl_bl "BSQ704: working part-time for pay during school term"
lab var parttime_schl_same_bl "BSQ704: working part-time for pay during school term, same job as holidays"
lab var parttime_schl_diff_bl "BSQ704: working part-time for pay during school term, different job than holidays"
lab var work_nopay_bl "BSQ705: working without pay"
lab var work_nopay_fam_bl "BSQ705: working without pay for family business"
lab var chores_bl "BSQ706: does household chores regularly when at home"
lab var pocketmoney_bl "BSQ707: regularly receives pocket money while at home"

**********************************************************************
* 	SECTION 8: Entrepreneurship Skills and Personal Finance			 *
**********************************************************************
qui gen borrow_bl=(borrow_800==1)
qui gen wait_10k_bl=(moneyoffer_801==2)
qui gen wait_20k_bl=(moneyoffer_802==2)
qui replace wait_10k_bl=. if moneyoffer_801==.|moneyoffer_801==-55
qui replace wait_20k_bl=. if moneyoffer_802==.|moneyoffer_802==-55
qui gen compound_interest_bl=(savings_803==1)
qui egen anysavings_bl=anymatch(savedmon_804), v(-66 2 3 4 5 6 7)
qui gen savings_less5k_bl=(savedamt_805==1 & savedmon_804!=1)
qui gen savings_5kto10k_bl=(savedamt_805==2 & savedmon_804!=1)
qui gen savings_more10k_bl=(savedamt_805==3 & savedmon_804!=1)
foreach x in less5k 5kto10k more10k {
	qui gen savings_`x'_cond_bl=savings_`x'_bl if savedmon_804==1
}
lab var borrow_bl "BSQ800: borrowed to take advantage of economic opportunity"
lab var wait_10k_bl "BSQ801: Prefer 10k FRW one month from now to 5k FRW today"
lab var wait_20k_bl "BSQ802: Prefer 20k FRW one month from now to 5k FRW today"
lab var compound_interest_bl "BSQ803: answer question about compound interest correctly"
lab var anysavings_bl "BSQ804: has money saved"
lab var savings_less5k_bl "BSQ805: savings less than 5k FRW"
lab var savings_5kto10k_bl "BSQ805: savings 5,000-10,000 FRW"
lab var savings_more10k_bl "BSQ805: savings more than 10k FRW"
lab var savings_less5k_cond_bl "BSQ805: savings less than 5k FRW (conditional on savings)"
lab var savings_5kto10k_cond_bl "BSQ805: savings 5,000-10,000 FRW (conditional on savings)"
lab var savings_more10k_cond_bl "BSQ805: savings more than 10k FRW (conditional on savings)"

**********************************************************************
* 	SECTION 9:  Youth Skill Development Scale			 			 *
**********************************************************************

**********************************************************************
* 	SECTION 10:  Entrepreneurship Knowledge (Based on the curriculum)*			 			 
**********************************************************************
qui egen mrktskl_true_bl=anycount(mrktskl_1000_DevProBrand mrktskl_1000_Advert mrktskl_1000_VerbalProm), v(1)
qui egen mrktskl_false_bl=anycount(mrktskl_1000_KeepingAccount mrktskl_1000_ConducProQAssurance mrktskl_1000_BusinessReg mrktskl_1000_OpenBid mrktskl_1000_DivLabour), v(1)
qui gen mrktskl_true_pct_bl=mrktskl_true_bl/3
qui gen mrktskl_false_pct_bl=mrktskl_false_bl/5
qui egen buspln_true_bl=anycount(busplnskl_1001_BusiName busplnskl_1001_MarketgPlan), v(1)
qui egen buspln_false_bl=anycount(busplnskl_1001_ReceiFromSales busplnskl_1001_PandLStatemt busplnskl_1001_Audit), v(1)
qui gen buspln_true_pct_bl=buspln_true_bl/2
qui gen buspln_false_pct_bl=buspln_false_bl/3
qui gen profit_calculation_bl=(proftamt_1002==30000)
qui egen incrproft_true_bl=anycount(incrproft_1003_SellMorePaper incrproft_1003_IncreasPrice incrproft_1003_UseCheaperMat), v(1)
qui egen incrproft_false_bl=anycount(incrproft_1003_EmploySone), v(1)
qui gen incrproft_true_pct_bl=incrproft_true_bl/3
qui gen incrproft_false_pct_bl=incrproft_false_bl
qui gen busgrwth_indic_bl=(notbusgrth_1004==4)
qui gen profit_definition_bl=(knownoproft_1005_CostXmtSell==1)
qui gen profit_def_strict_bl=(knownoproft_1005_CostXmtSell==1 & knownoproft_1005_CostXlsSell==0 & knownoproft_1005_SellXls50Xs==0 & knownoproft_1005_Sellmt50Xs==0)
lab var mrktskl_true_bl "BSQ1000: number of true marketing skills identified (of 3)" 	
lab var mrktskl_false_bl "BSQ1000: number of false marketing skills identified (of 5)" 
lab var mrktskl_true_pct_bl "BSQ1000: % of true marketing skills identified (of 3)" 	
lab var mrktskl_false_pct_bl "BSQ1000: % of false marketing skills identified (of 5)" 
lab var buspln_true_bl "BSQ1001 number of true business plan elements identified (of 2)" 	
lab var buspln_false_bl "BSQ1001: number of false business plan elements identified (of 3)" 
lab var buspln_true_pct_bl "BSQ1001: % of true business plan elements identified (of 2)" 	
lab var buspln_false_pct_bl "BSQ1001: % of false business plan elements identified (of 3)" 
lab var profit_calculation_bl "BSQ1002: can calculate profit from example"
lab var incrproft_true_bl "BSQ1003: number of true ways to increase profit identified (of 3)" 	
lab var incrproft_false_bl "BSQ1003: number of false ways to increase profit identified (of 1)" 
lab var incrproft_true_pct_bl "BSQ1003: % of true true ways to increase profit identified (of 3)" 	
lab var incrproft_false_pct_bl "BSQ1003: % of false true ways to increase profit identified (of 1)"
lab var busgrwth_indic_bl "BSQ1004: correctly identifies indicators of business growth"
lab var profit_definition_bl "BSQ1005: understands definition of profit"
lab var profit_def_strict_bl "BSQ1005: understands definition of profit, inc. no false responses"

qui egen eknowledge_index_bl=rowmean(mrktskl_true_pct_bl buspln_true_pct_bl profit_calculation_bl incrproft_true_pct_bl profit_definition_bl busgrwth_indic_bl)
lab var eknowledge_index_bl "BSQ1000-1005: mean of entrepreneurship knowledge questions"

**********************************************************************
* 				SECTION 11:  Aspirations 							 *			 			 
**********************************************************************
qui gen planned_schl_bl=0
qui replace planned_schl_bl=1 if schooling_1100==3
qui replace planned_schl_bl=2 if schooling_1100==4
qui replace planned_schl_bl=3 if schooling_1100==5|schooling_1100==6
qui replace planned_schl_bl=4 if schooling_1100==7
qui replace planned_schl_bl=5 if schooling_1100==8
lab def planned_schl_bl 0 "less than S6 or no answer" 1 "S6" 2 "TVET" 3 "diploma or university (A0/A1)" 4 "master's" 5 "doctorate"
lab val planned_schl_bl planned_schl_bl
qui gen planned_schl_sec_bl=(planned_schl_bl==1)
qui gen planned_schl_tvet_bl=(planned_schl_bl==2)
qui gen planned_schl_univ_bl=(planned_schl_bl==3)
qui gen planned_schl_mast_bl=(planned_schl_bl==4)
qui gen planned_schl_phd_bl=(planned_schl_bl==5)
qui gen planned_schl_postsec_bl=(planned_schl_bl>=3 & planned_schl_bl!=.)
qui gen planned_occup_ag_bl=(occup_1101==1)
qui gen planned_occup_business_bl=(occup_1101==2)
qui gen planned_occup_pro_bl=(occup_1101==5)
qui gen planned_occup_busorpro_bl=(occup_1101==2|occup_1101==5)
qui gen planned_business_bl=(busnsftr_1102==2)
lab var planned_schl_bl "BSQ1100: highest level of schooling planned to complete"
lab var planned_schl_sec_bl "BSQ1100: highest level of schooling planned to complete: secondary"
lab var planned_schl_tvet_bl "BSQ1100: highest level of schooling planned to complete: TVET"
lab var planned_schl_univ_bl "BSQ1100: highest level of schooling planned to complete: diploma or university"
lab var planned_schl_mast_bl "BSQ1100: highest level of schooling planned to complete: master's"
lab var planned_schl_phd_bl "BSQ1100: highest level of schooling planned to complete: doctorate"
lab var planned_schl_postsec_bl "BSQ100: plans to attend post-secondary"
lab var planned_occup_ag_bl "BSQ1101: planned occupation: agriculture"
lab var planned_occup_business_bl "BSQ1101: planned occupation: business"
lab var planned_occup_pro_bl "BSQ1101: planned occupation: professional/salaried"
lab var planned_occup_busorpro_bl "BSQ1101: planned occupation: business or professional/salaried"
lab var planned_business_bl "BSQ1102: plans to start business after finishing school"

**********************************************************************
* 				SECTION 12:  Locus of control						 *			 			 
**********************************************************************
qui gen control_univ_bl=univ_1200
qui gen control_housework_bl=hswork_1202
qui gen control_children_bl=child_1204
qui gen control_headboy_bl=head_1206
qui gen control_safe_bl=safe_1208
qui gen control_univ_lowscore_bl=(reas_1201==1)
qui gen control_univ_money_bl=(reas_1201==3)
qui gen control_univ_dontwant_bl=(reas_1201==5)
qui gen control_univ_family_bl=(reas_1201==6)
qui gen control_housework_parents_bl=(reas_1203==1)
qui gen control_children_god_bl=(reas_1205==1)
qui gen control_children_plan_bl=(reas_1205==2)
qui gen control_children_spouse_bl=(reas_1205==3)
qui gen control_children_couple_bl=(reas_1205==4)
foreach x in univ housework children headboy safe {
	qui replace control_`x'_bl=. if control_`x'_bl<1
}
qui egen control_avg_bl=rowmean(control_univ_bl control_housework_bl control_children_bl control_headboy_bl control_safe_bl)

lab var control_univ_bl "BSQ1200: locus of control: attend university (1=no control, 10=total control)"
lab var control_housework_bl "BSQ1202: locus of control: amount of housework (1=no control, 10=total control)"
lab var control_children_bl "BSQ1204: locus of control: number of children (1=no control, 10=total control)"
lab var control_headboy_bl "BSQ1206: locus of control: be head boy/girl (1=no control, 10=total control)"
lab var control_safe_bl "BSQ1208: locus of control: safe when walking (1=no control, 10=total control)"
lab var control_avg_bl "BSQ1200/1202/1204/1206/1208: locus of control average: 1=no control, 10=total control"
lab var control_univ_lowscore_bl "BSQ1201: reason for university locus of control: might not have high enough points"
lab var control_univ_money_bl "BSQ1201: reason for university locus of control: might not have enough money"
lab var control_univ_dontwant_bl "BSQ1201: reason for university locus of control: don't want to attend"
lab var control_univ_family_bl "BSQ1201: reason for university locus of control: want to start family instead"
lab var control_housework_parents_bl "BSQ1203: reason for housework locus of control: parents decide"
lab var control_children_god_bl "BSQ1205: reason for children locus of control: God decides"
lab var control_children_plan_bl "BSQ1205: reason for children locus of control: family planning"
lab var control_children_spouse_bl "BSQ1205: reason for children locus of control: spouse decides"
lab var control_children_couple_bl "BSQ1205: reason for children locus of control: we decide as couple"

**********************************************************************
* 				SECTION 13:  Grit						 			 *			 			 
**********************************************************************
/*first recode items so that 1=lowest grit, 5=highest*/
qui gen ideasprojx=ideasproj_1300
qui gen diffintrestx=diffintrest_1301
qui gen passchangx=passchang_1302
qui gen newprojx=newproj_1303

foreach x in ideasproj diffintrest passchang newproj {
    recode `x'x (5=1)(4=2)(3=3)(2=4)(1=5), gen(`x'_bl)
    label define lbl_`x'bl 1 "very true" 2 "true" 3 "somehow true" 4 "not so true" 5 "not true"
    label values `x'_bl lbl_`x'bl
    drop `x'x
}


qui egen grit_raw_bl=rowmean(ideasproj_bl diffintrest_bl passchang_bl newproj_bl)
qui pca ideasproj_bl diffintrest_bl passchang_bl newproj_bl
qui predict grit_pc1_bl
lab var ideasproj_bl "BSQ1300: New ideas and projects sometimes distract me from older projects (1=very true, 5=not true)"
lab var diffintrest_bl "BSQ1301: Difficult to stay interested in project that takes long time (1=very true, 5=not true)"
lab var passchang_bl "BSQ1302: My interests/passions change year to year (1=very true, 5=not true)"
lab var newproj_bl "BSQ1303: I become interested in new projects every few months (1=very true, 5=not true)"
lab var grit_raw_bl "BSQ1300-1303: Grit index, raw score (mean of 4 items, 1=lowest, 5=highest)"
lab var grit_pc1_bl "BSQ1300-1303: Grit, first principal component"

**********************************************************************
* 				SECTION 14:  Creativity						 		 *			 			 
**********************************************************************
* SECTION 14: Creativity
* Score responses according to "East African Youth Creativity Scale - FV.pdf"
* merge cleaned versions of questions 1400-1401
qui gen key=KEY
qui save "$temp/studenttemp.dta", replace

qui use "$rawdata/student/Section14_Q1400-1401_Cleaned_V12.dta", clear
local keepvars "proj_desc_1400 specify_1400 idea1_1401 idea2_1401"
keep key `keepvars'

merge 1:1 key using "$temp/studenttemp.dta", force
drop _merge


/*1400: business project idea*/
labelbook code_1400lab
tab specify_1400
tab proj_desc_1400 if specify_1400==""

/*1401: idea if lost key (two ideas)*/
labelbook code_idea1_1401lab
tab idea1_1401

labelbook code_idea2_1401lab
tab idea2_1401

/*1402: Picture #1*/

/*1403: Picture #2*/
labelbook drawing_1403
tab drawing_1403

/*1404: Picture #3*/
labelbook drawing_1404
tab drawing_1404

/*1405: Picture #4*/
labelbook drawing_1405
tab drawing_1405

*log close

* enter ratings
run "$dofiles/00_dataprep/rwanda_student_baseline_modify1_aux1.do" 

* assess reconciled ratings
/*Get pairwise correlation and Cohen's kappa*/
/*kappa between .61-.8 considered "substantial agreement" */
foreach x in proj_desc_1400 specify_1400 idea1_1401 idea2_1401 drawing_1403 drawing_1404 drawing_1405 {
	di "question=`x'"
	qui corr `x'_r1 `x'_r2
	di "correlation="r(rho)
	
	kap `x'_r1 `x'_r2, tab
}

* get average rating for each creativity measure
foreach x in proj_desc_1400 specify_1400 idea1_1401 idea2_1401 drawing_1403 drawing_1404 drawing_1405 {
	qui egen `x'_avg_bl=rowmean(`x'_r1 `x'_r2)
	lab var `x'_avg_bl "BSQ `x', creativity score (1-4, 4 highest), 2-rater average"
}

* save data
qui gen insample_bl=1
lab var insample_bl "in baseline sample"
qui compress
lab data "Student baseline survey (2016), modified from cleaned data"
qui save "$cleandata/student_baseline_clean_jde.dta", replace	

erase "$temp/teachertemp.dta"
erase "$temp/studenttemp.dta"
erase "$temp/linktemp.dta"
local end=`"$S_TIME"' 
di "`start'"
di "`end'"
